1 STRATAA microbiome analysis

1.1 Imports

library(kableExtra)
library(ggplot2)
library(ggpubr)
library(patchwork)
library(vegan)

1.2 Sources

The file handles are set in config.R as they’re used by both this script and data_cleaning.

source("/Users/flashton/Dropbox/GordonGroup/STRATAA_Microbiome/from_Leo/Leonardos_analysis/bin/core_functions.R")
source("/Users/flashton/Dropbox/GordonGroup/STRATAA_Microbiome/from_Leo/Leonardos_analysis/bin/config.R")

1.3 Metadata basics

First do the plots with kruskall wallis comparison between groups to see if there’s an overall difference, then do the pairwise tests because KW says there is a difference.

metadata <- read_metadata(metadata_handle)

metadata <- metadata %>% mutate(Group = if_else(Group == 'Control_HealthySerosurvey', 'Healthy Control', Group))
metadata <- metadata %>% mutate(Group = if_else(Group == 'Acute_Typhi', 'Acute typhoid', Group))
number_per_country <- metadata %>% group_by(Country) %>% summarise(count = n())

number_per_country %>% kbl() %>% kable_styling()
Country count
Bangladesh 120
Malawi 114
Nepal 76
print(sum(number_per_country$count))
## [1] 310
metadata %>% group_by(Group) %>% summarise(count = n()) %>% kbl() %>% kable_styling()
Group count
Acute typhoid 103
Carrier 110
Healthy Control 97
baseline_chars <- get_baseline_characteristics(metadata)

baseline_chars$pct_anti
## [1] 37.50000  0.00000  0.00000 26.47059  0.00000  0.00000 44.82759  0.00000
## [9]  0.00000
baseline_chars$pct_anti %>% na_if(0)
## [1] 37.50000       NA       NA 26.47059       NA       NA 44.82759       NA
## [9]       NA
# BEWARE - na_if(0.0) will convert ALL 0s to NAs, this is ok as they're only in the antibiotics of carriers and controls
# at the moment, but if others get added in, will screw with it.
baseline_chars %>% rename(`Median age` = median_age, `Women (%)` = pct_fem, `Antibiotics in last 2 weeks (%)` = pct_anti) %>% pivot_longer(!c(Country, Group)) %>% rename(variable_name = name) %>% pivot_wider(names_from = Group, values_from = value) %>% filter(variable_name != 'number') %>% na_if(0.0) %>% kbl(digits = c(NA, NA, 1, 1, 1)) %>% kable_styling()
Country variable_name Acute typhoid Carrier Healthy Control
Bangladesh Median age 6.0 37.0 28.5
Bangladesh Women (%) 47.5 47.5 65.0
Bangladesh Antibiotics in last 2 weeks (%) 37.5 NA NA
Malawi Median age 9.7 32.0 24.0
Malawi Women (%) 64.7 57.5 65.0
Malawi Antibiotics in last 2 weeks (%) 26.5 NA NA
Nepal Median age 17.2 43.9 35.0
Nepal Women (%) 24.1 66.7 82.4
Nepal Antibiotics in last 2 weeks (%) 44.8 NA NA
ggplot(metadata, aes(x = Country, y = Age, fill = Group)) + geom_boxplot() + stat_compare_means(method = 'kruskal.test', label = "p")

comparisons_groups <- list(c("Acute typhoid", "Carrier"), c("Acute typhoid", "Healthy Control"), c("Carrier", "Healthy Control"))
comparisons_countries <- list(c('Bangladesh', 'Malawi'), c('Bangladesh', 'Nepal'), c('Malawi', 'Nepal'))
# default stat method when doing pairwise tests in wilcoxon.
ggboxplot(metadata, facet.by = "Country", y = "Age", x = "Group", color = "Group") + stat_compare_means(comparisons = comparisons_groups) + rremove("x.text") + rremove("xlab") + rremove("x.ticks") # +rotate_x_text(angle = 45)

ggboxplot(metadata, facet.by = "Group", y = "Age", x = "Country", color = "Country") + stat_compare_means(comparisons = comparisons_countries) + rotate_x_text(angle = 45)

country_group_sex <- metadata %>% group_by(Country, Group, Sex) %>% summarise(count = n()) 

plot_sex <- function(eg1, c){
  d <- eg1 %>% filter(Country == c)
  p <- ggplot(d, aes(x = Group, y = count, fill = Sex)) + 
    geom_bar(stat ='identity', position = 'fill') + 
    ylab('Proportion') + 
    ggtitle(c)# +
    #theme(axis.text=element_text(size=34), axis.title=element_text(size=36,face="bold"), plot.title = element_text(size = 40, face = "bold"), legend.key.size = unit(4, 'cm'), legend.title = element_text(size = 34), legend.text = element_text(size = 28))
  
  return(p)
}

m_sex <- plot_sex(country_group_sex, 'Malawi')
b_sex <- plot_sex(country_group_sex, 'Bangladesh')
n_sex <- plot_sex(country_group_sex, 'Nepal')
m_sex / b_sex / n_sex 

1.4 Alpha diversity

meta_for_alpha <- metadata %>% select(isolate, Group, Sex, Country, Antibiotics_taken_before_sampling_yes_no_assumptions, age_bracket)

1.4.1 all three

ANOVA results

anova_all_three_csgaa_res <- read_delim(file.path(output_folder_all_three, '3_alpha', 'shannon.anova.results.tsv'), delim = '\t')
anova_all_three_csgaa_res %>% kbl %>% kable_styling()
rownames(anova_res[[1]]) Df Sum.Sq Mean.Sq F.value Pr..F. is_it_significant
Country:Group 4 11.0251459 2.7562865 8.4327318 0.0000022 significant
Group 2 6.5972886 3.2986443 10.0920507 0.0000615 significant
Country 2 6.1559033 3.0779517 9.4168517 0.0001149 significant
Country:Antibiotics_taken_before_sampling_yes_no_assumptions 2 3.2718745 1.6359372 5.0050748 0.0074085 significant
Country:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 3 3.7981561 1.2660520 3.8734281 0.0098608 significant
Country:Sex:Antibiotics_taken_before_sampling_yes_no_assumptions 2 2.6171373 1.3085686 4.0035056 0.0194634 not_significant
Country:Sex 2 1.4229022 0.7114511 2.1766519 0.1156181 not_significant
Sex:Group 2 1.1109702 0.5554851 1.6994811 0.1849346 not_significant
age_bracket 3 1.5899600 0.5299867 1.6214699 0.1849599 not_significant
Sex:Group:age_bracket 3 1.5836426 0.5278809 1.6150272 0.1864556 not_significant
Antibiotics_taken_before_sampling_yes_no_assumptions 2 1.0007596 0.5003798 1.5308890 0.2184138 not_significant
Country:Group:age_bracket 4 1.6901944 0.4225486 1.2927680 0.2734354 not_significant
Country:Sex:age_bracket 3 1.1382017 0.3794006 1.1607586 0.3253660 not_significant
Country:Sex:Group 4 1.4524154 0.3631039 1.1108995 0.3519447 not_significant
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 3 0.8900115 0.2966705 0.9076498 0.4379545 not_significant
Group:age_bracket 4 1.0062431 0.2515608 0.7696387 0.5458834 not_significant
Sex 1 0.1057328 0.1057328 0.3234845 0.5700442 not_significant
Sex:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 2 0.3545009 0.1772504 0.5422896 0.5821146 not_significant
Sex:age_bracket 3 0.5046598 0.1682199 0.5146611 0.6725503 not_significant
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions 1 0.0243682 0.0243682 0.0745533 0.7850500 not_significant
Country:age_bracket 4 0.5471136 0.1367784 0.4184672 0.7952587 not_significant
Residuals 244 79.7527911 0.3268557 NA NA NA

Plots

all_three_shannon_alpha <- read_delim(file.path(output_folder_all_three, '3_alpha', 'shannon.alpha_results.tsv'))
all_three_shannon_alpha <- left_join(all_three_shannon_alpha, meta_for_alpha, by = 'isolate', )
all_three_shannon_alpha <- all_three_shannon_alpha %>% mutate(Group = if_else(Group == 'Control_HealthySerosurvey', 'Healthy Control', Group))
all_three_shannon_alpha <- all_three_shannon_alpha %>% mutate(Group = if_else(Group == 'Acute_Typhi', 'Acute typhoid', Group)) %>% arrange(Country)


country_comps <- list(c('Bangladesh', 'Malawi'), c('Malawi', 'Nepal'), c('Bangladesh', 'Nepal'))
ggplot(all_three_shannon_alpha, aes(x=Country, y=alpha)) + 
  ggtitle("By country") + 
  labs(x="", y="Alpha diversity") +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), axis.text.y = element_text(size = 12), axis.title = element_text(size = 14, face = 'bold')) +
  stat_compare_means(size = 4, label = "p.format", comparisons = country_comps)

group_comps <- list(c(1, 2), c(2, 3), c(1, 3))
ggplot(all_three_shannon_alpha, aes(x=Group, y=alpha)) + 
  ggtitle("By group") + 
  labs(x="", y="Alpha diversity") +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), axis.text.y = element_text(size = 12), axis.title = element_text(size = 14, face = 'bold')) +
  stat_compare_means(size = 4, label = "p.format", comparisons = group_comps)

  #geom_point(alpha = 0.3, position = "jitter")

country_group_comps <- list(c('Acute typhoid', 'Carrier'), c('Acute typhoid', 'Healthy Control'), c('Healthy Control', 'Carrier'))

ggboxplot(all_three_shannon_alpha, facet.by = "Country", y = "alpha", x = "Group", color = "Group") + stat_compare_means(comparisons = country_group_comps, label = 'p.signif') + rremove("x.text") + rremove("xlab") + rremove("x.ticks") # +rotate_x_text(angle = 45)

ggboxplot(all_three_shannon_alpha, facet.by = "Group", y = "alpha", x = "Country", color = "Country") + stat_compare_means(comparisons = country_comps) + rremove("x.text") + rremove("xlab") + rremove("x.ticks") # +rotate_x_text(angle = 45)

antibiotic_comps <- list(c('Yes', 'No'))
ggboxplot(all_three_shannon_alpha, facet.by = "Country", y = "alpha", x = "Antibiotics_taken_before_sampling_yes_no_assumptions", color = "Antibiotics_taken_before_sampling_yes_no_assumptions", legend.title = 'Antibiotics') + stat_compare_means(comparisons = antibiotic_comps) + rremove("x.text") + rremove("xlab") + rremove("x.ticks")

1.4.2 blantyre

ANOVA

anova_blantyre_sgaa_res <- read_delim(file.path(output_folder_blantyre, '3_alpha', 'shannon.anova.results.tsv'), delim = '\t')
anova_blantyre_sgaa_res %>% kbl %>% kable_styling()
rownames(anova_res[[1]]) Df Sum.Sq Mean.Sq F.value Pr..F. is_it_significant
Group 2 7.9649377 3.9824688 13.3520584 0.0000082 significant
Antibiotics_taken_before_sampling_yes_no_assumptions 1 4.6566930 4.6566930 15.6125356 0.0001533 significant
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 2 2.9475255 1.4737628 4.9410974 0.0091796 significant
Sex:Group 2 2.1447212 1.0723606 3.5953128 0.0314193 not_significant
Sex:Group:age_bracket 2 2.1337791 1.0668896 3.5769700 0.0319581 not_significant
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions 1 1.2400593 1.2400593 4.1575578 0.0443500 not_significant
age_bracket 2 1.0530543 0.5265272 1.7652923 0.1769464 not_significant
Group:age_bracket 2 0.7980604 0.3990302 1.3378321 0.2675253 not_significant
Sex 1 0.0956136 0.0956136 0.3205644 0.5726623 not_significant
Sex:age_bracket 2 0.2468792 0.1234396 0.4138570 0.6623334 not_significant
Residuals 91 27.1422319 0.2982663 NA NA NA

Plots

  • The p-value on the graph is a kruskal wallis test (which is just wilcoxon i think?)
blantyre_shannon_alpha <- read_delim(file.path(output_folder_blantyre, '3_alpha', 'shannon.alpha_results.tsv'))
blantyre_shannon_alpha <- left_join(blantyre_shannon_alpha, meta_for_alpha, by = 'isolate')

my_comparisons <- list(c('No', 'Yes'), c('No', 'Yes'), c('No', 'Yes'))

ggplot(blantyre_shannon_alpha, aes(x=age_bracket, y=alpha, fill = Antibiotics_taken_before_sampling_yes_no_assumptions)) + 
  #ggtitle("All three countries - country by group") + 
  labs(x="Age bracket", y="Alpha diversity", fill='Antibiotics', color = 'Antibiotics') +
  geom_boxplot(outlier.shape = NA, alpha = 0.2) +
  #geom_point(aes(color=as.factor(Antibiotics_taken_before_sampling_yes_no_assumptions)), alpha = 0.3) +
  geom_point(alpha = 0.5, position=position_dodge(width=0.75),aes(group=Antibiotics_taken_before_sampling_yes_no_assumptions, color=as.factor(Antibiotics_taken_before_sampling_yes_no_assumptions))) +
  stat_compare_means(label.y = c(5.2, 5.5, 5.7), size = 4, label = "p.format")

1.4.3 dhaka

anova_dhaka_sgaa_res <- read_delim(file.path(output_folder_dhaka, '3_alpha', 'shannon.anova.results.tsv'), delim = '\t')
anova_dhaka_sgaa_res %>% kbl %>% kable_styling()
rownames(anova_res[[1]]) Df Sum.Sq Mean.Sq F.value Pr..F. is_it_significant
Group 2 4.5622912 2.2811456 6.0661407 0.0033163 significant
Sex 1 1.4283133 1.4283133 3.7982448 0.0542564 not_significant
Sex:Group:age_bracket 1 0.3891949 0.3891949 1.0349672 0.3115788 not_significant
Sex:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 1 0.3424352 0.3424352 0.9106215 0.3423714 not_significant
age_bracket 3 1.0032423 0.3344141 0.8892914 0.4496800 not_significant
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 3 0.8659569 0.2886523 0.7675992 0.5149717 not_significant
Sex:age_bracket 3 0.8635489 0.2878496 0.7654646 0.5161812 not_significant
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions 1 0.1407568 0.1407568 0.3743078 0.5421269 not_significant
Antibiotics_taken_before_sampling_yes_no_assumptions 1 0.1191690 0.1191690 0.3169003 0.5748026 not_significant
Group:age_bracket 4 0.7582631 0.1895658 0.5041032 0.7327849 not_significant
Sex:Group 2 0.2172723 0.1086362 0.2888909 0.7497496 not_significant
Residuals 95 35.7243333 0.3760456 NA NA NA

1.4.4 kathmandu

anova_kathmandu_sgaa_res <- read_delim(file.path(output_folder_kathmandu, '3_alpha', 'shannon.anova.results.tsv'), delim = '\t')
anova_kathmandu_sgaa_res %>% kbl %>% kable_styling()
rownames(anova_res[[1]]) Df Sum.Sq Mean.Sq F.value Pr..F. is_it_significant
Group 2 2.4148394 1.2074197 3.6926494 0.0309377 not_significant
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions 1 1.2170738 1.2170738 3.7221746 0.0585915 not_significant
Group:age_bracket 2 0.9025969 0.4512985 1.3802052 0.2596639 not_significant
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 1 0.2589283 0.2589283 0.7918800 0.3772085 not_significant
Sex 1 0.0699282 0.0699282 0.2138614 0.6454878 not_significant
age_bracket 2 0.1213784 0.0606892 0.1856057 0.8310924 not_significant
Sex:age_bracket 1 0.0090645 0.0090645 0.0277218 0.8683436 not_significant
Antibiotics_taken_before_sampling_yes_no_assumptions 2 0.0601359 0.0300680 0.0919568 0.9122773 not_significant
Sex:Group 2 0.0545249 0.0272624 0.0833767 0.9201146 not_significant
Sex:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 1 0.0029275 0.0029275 0.0089531 0.9249420 not_significant
Residuals 58 18.9647959 0.3269792 NA NA NA

1.5 Beta diversity

Information on interpretation of PERMANOVA R2 from here - https://forum.qiime2.org/t/group-significance-changes-and-interpretation/2396 , some info on the p-values here https://forum.qiime2.org/t/understanding-beta-group-significance-permanova-results/12648

run_plot_beta <- function(output_folder, meta, vars_to_plot){
  pcoa.data.handle <- file.path(output_folder, '4_beta', "first_2d_coords.txt")
  pcoa.var.handle <- file.path(output_folder, '4_beta', "pcoa_var.txt")
  pcoa.data <- read_delim(pcoa.data.handle, delim = " ", escape_double = FALSE, trim_ws = TRUE)
  #pcoa.data <- pcoa.data %>% separate(col = Y, sep = '\t', into = c('x', 'y')) %>% rename(ID = X)
  pcoa.var <- read_delim(pcoa.var.handle, delim = " ", escape_double = FALSE, trim_ws = TRUE)
  #pcoa.data <- read_tsv(pcoa.data.handle)
  
  meta_for_beta <- meta %>% select(isolate, Country, Group, Sex, age_bracket, Antibiotics_taken_before_sampling_yes_no_assumptions, group_country, group_antibiotic)
  
  pcoa.data <- left_join(pcoa.data, meta_for_beta, by = c('Sample' = 'isolate'))

  # add some different plots to the below
  # or, change this function to take the variable to colour by
  plots <- plot_beta(pcoa.data, pcoa.var, vars_to_plot)
  return(plots)
}

1.5.1 All three countries

1.5.1.1 Plots

all_three_vars_to_plot <- c('Country', 'Group', 'Sex', 'age_bracket', 'group_country', 'group_antibiotic')
all_three_plots <- run_plot_beta(output_folder_all_three, metadata, all_three_vars_to_plot)

a3_c <- all_three_plots$country_plot
a3_c

a3_g <- all_three_plots$group_plot
a3_g

all_three_plots$sex_plot

all_three_plots$age_plot

all_three_plots$group_country_plot

all_three_plots$group_antibiotic_plot

a3_c / a3_g

1.5.1.2 PERMANOVA

pn_all3_csgaa_res <- read_delim(file.path(output_folder_all_three, '4_beta', 'permanova.results.tsv'))
pn_all3_csgaa_res %>% kbl %>% kable_styling()
variable R2 Pr..F. is_it_significant
Country:Group 0.1082441 0.0009990 significant
Country 0.0994852 0.0009990 significant
Group 0.0804962 0.0009990 significant
age_bracket 0.0246847 0.0009990 significant
Country:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0119992 0.0009990 significant
Country:Sex 0.0087463 0.0039960 significant
Country:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0080082 0.0059940 significant
Sex 0.0048251 0.0099900 significant
Country:age_bracket 0.0135873 0.0119880 not_significant
Country:Group:age_bracket 0.0112673 0.0869131 not_significant
Country:Sex:Group 0.0109128 0.1048951 not_significant
Group:age_bracket 0.0094648 0.3416583 not_significant
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0078747 0.1878122 not_significant
Sex:age_bracket 0.0078018 0.1958042 not_significant
Country:Sex:age_bracket 0.0074308 0.2857143 not_significant
Sex:Group:age_bracket 0.0071928 0.3376623 not_significant
Country:Sex:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0070626 0.0189810 not_significant
Antibiotics_taken_before_sampling_yes_no_assumptions 0.0066436 0.0539461 not_significant
Sex:Group 0.0057893 0.1048951 not_significant
Sex:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0054386 0.1788212 not_significant
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0023788 0.3516484 not_significant
Total 1.0000000 NA NA
Residuals 0.5506660 NA NA

1.5.2 Just Blantyre

1.5.2.1 Plots

blantyre_vars_to_plot <- c('Group', 'Sex', 'age_bracket', 'group_antibiotic')
blantyre_plots <- run_plot_beta(output_folder_blantyre, metadata, all_three_vars_to_plot)
b_g <- blantyre_plots$group_plot
b_g

blantyre_plots$sex_plot

blantyre_plots$age_plot

blantyre_plots$group_antibiotic_plot

1.5.2.2 PERMANOVA

pn_blantyre_sgaa_res <- read_delim(file.path(output_folder_blantyre, '4_beta', 'permanova.results.tsv'))
pn_blantyre_sgaa_res %>% kbl %>% kable_styling()
variable R2 Pr..F. is_it_significant
Group 0.2450967 0.0009990 significant
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0404489 0.0009990 significant
Antibiotics_taken_before_sampling_yes_no_assumptions 0.0213950 0.0039960 significant
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0156828 0.0079920 significant
age_bracket 0.0248784 0.0119880 not_significant
Sex:Group 0.0195954 0.0309690 not_significant
Sex:age_bracket 0.0191798 0.0709291 not_significant
Group:age_bracket 0.0182757 0.0859141 not_significant
Sex:Group:age_bracket 0.0179867 0.0889111 not_significant
Sex 0.0095135 0.0959041 not_significant
Total 1.0000000 NA NA
Residuals 0.5679472 NA NA

1.5.3 Just Dhaka

1.5.3.1 Plots

dhaka_vars_to_plot <- c('Group', 'Sex', 'age_bracket', 'group_antibiotic')
dhaka_plots <- run_plot_beta(output_folder_dhaka, metadata, all_three_vars_to_plot)
d_g <- dhaka_plots$group_plot
d_g

dhaka_plots$sex_plot

dhaka_plots$age_plot

dhaka_plots$group_antibiotic_plot

1.5.3.2 PERMANOVA

pn_dhaka_sgaa_res <- read_delim(file.path(output_folder_dhaka, '4_beta', 'permanova.results.tsv'))
pn_dhaka_sgaa_res %>% kbl %>% kable_styling()
variable R2 Pr..F. is_it_significant
Group 0.2850528 0.0009990 significant
age_bracket 0.0378323 0.0009990 significant
Group:age_bracket 0.0228223 0.5874126 not_significant
Sex:age_bracket 0.0171320 0.5414585 not_significant
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0162455 0.6623377 not_significant
Sex:Group 0.0133998 0.2627373 not_significant
Sex 0.0105260 0.0489510 not_significant
Antibiotics_taken_before_sampling_yes_no_assumptions 0.0076070 0.1938062 not_significant
Sex:Group:age_bracket 0.0072387 0.2297702 not_significant
Sex:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0072303 0.2147852 not_significant
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0054526 0.5494505 not_significant
Total 1.0000000 NA NA
Residuals 0.5694607 NA NA

1.5.4 Just Kathmandu

1.5.4.1 Plots

kathmandu_vars_to_plot <- c('Group', 'Sex', 'age_bracket', 'group_antibiotic')
kathmandu_plots <- run_plot_beta(output_folder_kathmandu, metadata, all_three_vars_to_plot)
k_g <- kathmandu_plots$group_plot
k_g

kathmandu_plots$sex_plot

kathmandu_plots$age_plot

kathmandu_plots$group_antibiotic_plot

1.5.4.2 PERMANOVA

pn_kathmandu_sgaa_res <- read_delim(file.path(output_folder_kathmandu, '4_beta', 'permanova.results.tsv'))
pn_kathmandu_sgaa_res %>% kbl %>% kable_styling()
variable R2 Pr..F. is_it_significant
Group 0.0771814 0.0009990 significant
age_bracket 0.0333476 0.1328671 not_significant
Sex:Group 0.0288279 0.2887113 not_significant
Group:age_bracket 0.0287356 0.3116883 not_significant
Sex 0.0207446 0.0509491 not_significant
Antibiotics_taken_before_sampling_yes_no_assumptions 0.0195485 0.8801199 not_significant
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0125371 0.4835165 not_significant
Sex:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0120137 0.5194805 not_significant
Sex:age_bracket 0.0103350 0.7102897 not_significant
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0072818 0.9710290 not_significant
Total 1.0000000 NA NA
Residuals 0.7494468 NA NA

1.5.5 Blantyre and Dhaka

1.5.5.1 Plots

blantyre_dhaka_vars_to_plot <- c('Group', 'Sex', 'age_bracket', 'group_antibiotic')
blantyre_dhaka_plots <- run_plot_beta(output_folder_blantyre_dhaka, metadata, all_three_vars_to_plot)
blantyre_dhaka_plots$group_plot

blantyre_dhaka_plots$sex_plot

blantyre_dhaka_plots$age_plot

blantyre_dhaka_plots$group_antibiotic_plot

1.5.5.2 PERMANOVA

pn_blantyre_dhaka_csgaa_res <- read_delim(file.path(output_folder_blantyre_dhaka, '4_beta', 'permanova.results.tsv'))
pn_blantyre_dhaka_csgaa_res %>% kbl %>% kable_styling()
variable R2 Pr..F. significant
Group 0.1540878 0.0009990 significant
Country 0.1057172 0.0009990 significant
Country:Group 0.0785701 0.0009990 significant
age_bracket 0.0279396 0.0009990 significant
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0132086 0.0059940 significant
Country:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0095551 0.0059940 significant
Country:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0068922 0.0039960 significant
Country:Sex:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0057642 0.0099900 significant
Group:age_bracket 0.0120929 0.2737263 not_significant
Sex:age_bracket 0.0099415 0.1378621 not_significant
Sex:Group:age_bracket 0.0094396 0.2027972 not_significant
Country:age_bracket 0.0086514 0.0339660 not_significant
Sex:Group 0.0080736 0.0459540 not_significant
Country:Group:age_bracket 0.0070425 0.1388611 not_significant
Country:Sex:Group 0.0069062 0.1448551 not_significant
Country:Sex:age_bracket 0.0067914 0.1658342 not_significant
Sex 0.0054105 0.0269730 not_significant
Antibiotics_taken_before_sampling_yes_no_assumptions 0.0045536 0.0599401 not_significant
Sex:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0037801 0.1338661 not_significant
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions 0.0033876 0.2027972 not_significant
Country:Sex 0.0033118 0.2237762 not_significant
Total 1.0000000 NA NA
Residuals 0.5088827 NA NA

1.5.6 Combined plots

It looks like some might be being trimmed by the coordinate limits in the plot_beta function, but they’re not (i’ve checked both blantyre and dhaka).

#b_g / d_g | k_g / plot_spacer() + plot_layout(guides = 'collect')
b_g + d_g + k_g + guide_area() + plot_layout(ncol = 2, guides = 'collect')

1.6 Taxa associated with participant groups

combined_output_root <- '/Users/flashton/Dropbox/GordonGroup/STRATAA_Microbiome/from_leo/phil/2022.11.08/combined_analysis'
if (!dir.exists(combined_output_root)){ dir.create(combined_output_root) }

groups_for_comparison <- c("Control_HealthySerosurvey", "Acute_Typhi")
covars <- c('Country', 'Sex', 'Age', 'Antibiotics_taken_before_sampling_yes_no_assumptions')

to_combine <- c(output_folder_all_three, output_folder_blantyre, output_folder_dhaka, output_folder_kathmandu, output_folder_blantyre_dhaka)

# the_date doesn't have to be todays date, it's just the current working dir
the_date <- '2022.11.10'
dge_output <- combine_and_compare_dges(to_combine, covars, output_folder_all_three, output_folder_blantyre, output_folder_dhaka, output_folder_kathmandu, combined_output_root, the_date, groups_for_comparison)
## [1] "Bangladesh FDR <= 0.01"
## [1] 936
## [1] "Malawi FDR <= 0.01"
## [1] 738
## [1] "Nepal FDR <= 0.01"
## [1] 7
## [1] "malawi_bangladesh FDR <= 0.01"
## [1] 975
## [1] "Jaccard of unfiltered taxa of Bang, Mal, Nep =  0.425525525525526"
#options(scipen = 999)
#options(scipen = 0) # to switch scientific notation back on

dge_output$sig_up_cases_controls %>% select(species, malawi_bangladesh_logFC, malawi_bangladesh_FDR, bangladesh_logFC, bangladesh_FDR, malawi_logFC, malawi_FDR) %>% arrange(malawi_bangladesh_FDR) %>% kbl() %>% kable_styling()
species malawi_bangladesh_logFC malawi_bangladesh_FDR bangladesh_logFC bangladesh_FDR malawi_logFC malawi_FDR
Clostridium_P perfringens 3.418064 0.00e+00 4.081502 0.0000034 3.072498 0.0000026
Paeniclostridium sordellii 3.289270 0.00e+00 2.682375 0.0002721 3.595776 0.0000068
Prevotella copri 3.215595 0.00e+00 3.252965 0.0000498 2.964184 0.0002320
Selenomonas_C sp002351185 9.923578 0.00e+00 10.206262 0.0002912 9.230450 0.0002674
Dialister sp000434475 4.177501 0.00e+00 5.436875 0.0000238 2.962639 0.0089626
Romboutsia timonensis 3.023815 1.50e-06 2.826541 0.0029203 2.901554 0.0037327
Dialister sp002320515 6.723816 1.08e-05 11.336950 0.0004670 4.639173 0.0081446
Dialister sp002297935 6.799250 6.44e-05 9.320825 0.0001399 4.091471 0.0054813
dge_output$sig_down_cases_controls %>% select(species, malawi_bangladesh_logFC, malawi_bangladesh_FDR, bangladesh_logFC, bangladesh_FDR, malawi_logFC, malawi_FDR) %>% arrange(malawi_bangladesh_FDR) %>% kbl() %>% kable_styling()
species malawi_bangladesh_logFC malawi_bangladesh_FDR bangladesh_logFC bangladesh_FDR malawi_logFC malawi_FDR
Olsenella_C umbonata -16.402044 0.0000000 -20.051264 0.0000000 -9.581691 0.0000001
KLE1796 sp001580115 -9.944016 0.0000000 -9.596562 0.0000000 -9.018228 0.0000003
F23-B02 sp003533405 -12.733131 0.0000000 -7.558436 0.0067086 -14.795353 0.0000000
Lactobacillus_B ruminis -14.997411 0.0000000 -20.923936 0.0000000 -6.240719 0.0000000
Solobacterium sp900343155 -11.372181 0.0000000 -12.170168 0.0000000 -10.199193 0.0000021
Massiliomicrobiota timonensis -9.393248 0.0000000 -9.634145 0.0000000 -8.858593 0.0000014
Clostridium_M bolteae -7.291020 0.0000000 -13.725675 0.0000000 -2.768290 0.0000000
Bacteroides_A sp000434735 -7.367197 0.0000000 -12.098871 0.0000000 -5.133698 0.0000002
NK4A136 sp000687675 -8.925917 0.0000000 -8.574760 0.0000046 -8.927376 0.0000002
Faecalicatena sp000509105 -7.423736 0.0000000 -13.264389 0.0000000 -1.433171 0.0037446
Absiella innocuum -8.426218 0.0000000 -15.003970 0.0000000 -2.045904 0.0020647
UBA7102 sp002492415 -9.964592 0.0000000 -7.492815 0.0000797 -11.503441 0.0000000
Eubacterium limosum_A -9.896316 0.0000000 -10.367409 0.0000002 -8.275325 0.0000616
F0332 sp001652275 -11.136505 0.0000000 -11.153392 0.0000004 -9.863396 0.0000055
Solobacterium extructa -9.594100 0.0000000 -9.831203 0.0000000 -8.998385 0.0017448
UBA1777 sp900321275 -10.158145 0.0000000 -8.402279 0.0035241 -11.012279 0.0000000
Pauljensenia odontolyticus -8.981532 0.0000000 -12.233399 0.0000000 -8.978208 0.0074376
Lactobacillus_H mucosae -14.070677 0.0000000 -17.176342 0.0000000 -5.787237 0.0035450
Solobacterium moorei -9.220286 0.0000000 -14.583365 0.0000000 -3.485564 0.0031044
NK3B98 sp003150485 -10.321224 0.0000000 -7.451078 0.0001947 -11.495422 0.0000000
CAG-145 sp000435615 -7.670724 0.0000000 -10.694660 0.0000000 -4.286481 0.0013487
CAG-791 sp900321785 -8.370999 0.0000000 -8.096736 0.0000293 -8.260572 0.0009183
UBA1390 sp002305315 -8.493585 0.0000000 -8.183057 0.0000515 -8.410475 0.0004686
UBA1777 sp002371675 -11.680438 0.0000000 -9.493327 0.0094971 -11.210683 0.0000000
UBA2727 sp900315505 -8.520189 0.0000000 -8.247159 0.0000485 -8.496791 0.0055841
UBA1417 sp003531055 -7.788689 0.0000000 -16.684786 0.0000000 -1.795009 0.0060732
Eisenbergiella tayi -2.253882 0.0000000 -2.286730 0.0007815 -1.720815 0.0037448
Streptococcus oralis_AE -8.387973 0.0000000 -12.247159 0.0000000 -6.154434 0.0036509
Angelakisella sp003453215 -6.529870 0.0000002 -7.570891 0.0029393 -6.087831 0.0001393
Eubacterium_E hallii_A -14.393519 0.0000002 -20.418227 0.0000000 -1.982034 0.0024501
Olsenella_E provencensis -6.428614 0.0000005 -3.597808 0.0016418 -8.908750 0.0000197
Eggerthella lenta -3.033424 0.0000006 -3.348238 0.0000273 -3.974787 0.0011601
Enteroscipio sp000270285 -10.664030 0.0000007 -9.986373 0.0016677 -9.678756 0.0000016
Eubacterium_F sp002435545 -3.661160 0.0000088 -8.004029 0.0001154 -2.521352 0.0064757
UBA1777 sp900319465 -7.605461 0.0000174 -8.115137 0.0056352 -5.681010 0.0080948
UBA6857 sp002438505 -7.444564 0.0038637 -3.443917 0.0048247 -5.673872 0.0089186
#covar_initials <- paste(str_sub(covars, 1, 6), sep = '', collapse = '')

#combined_dge_output_folder <- file.path(combined_output_root, paste(covar_initials, 'combined_edgeR'))